#------------------------------------------------------------------------------#
#
#                 _         _    _      _                _
#                (_)       | |  | |    | |              | |
#   _ __    ___   _  _ __  | |_ | |__  | |  __ _  _ __  | | __
#  | '_ \  / _ \ | || '_ \ | __|| '_ \ | | / _` || '_ \ | |/ /
#  | |_) || (_) || || | | || |_ | |_) || || (_| || | | ||   <
#  | .__/  \___/ |_||_| |_| \__||_.__/ |_| \__,_||_| |_||_|\_\
#  | |
#  |_|
#
#  This file is part of the 'rstudio/pointblank' project.
#
#  Copyright (c) 2017-2025 pointblank authors
#
#  For full copyright and license information, please look at
#  https://rstudio.github.io/pointblank/LICENSE.html
#
#------------------------------------------------------------------------------#


#' Execute all agent and informant YAML tasks
#'
#' @description
#'
#' The `yaml_exec()` function takes all relevant **pointblank** YAML files in a
#' directory and executes them. Execution involves interrogation of agents for
#' YAML agents and incorporation of informants for YAML informants. Under the
#' hood, this uses [yaml_agent_interrogate()] and [yaml_informant_incorporate()]
#' and then [x_write_disk()] to save the processed objects to an output
#' directory. These written artifacts can be read in at any later time with the
#' [x_read_disk()] function or the [read_disk_multiagent()] function. This is
#' useful when data in the target tables are changing and the periodic testing
#' of such tables is part of a data quality monitoring plan.
#'
#' The output RDS files are named according to the object type processed, the
#' target table, and the date-time of processing. For convenience and
#' modularity, this setup is ideal when a table store YAML file (typically named
#' `"tbl_store.yml"` and produced via the [tbl_store()] and [yaml_write()]
#' workflow) is available in the directory, and when table-prep formulas are
#' accessed by name through [tbl_source()].
#'
#' A typical directory of files set up for execution in this way might have the
#' following contents:
#'
#' - a `"tbl_store.yml"` file for holding table-prep formulas (created with
#' [tbl_store()] and written to YAML with [yaml_write()])
#' - one or more YAML *agent* files to validate tables (ideally using
#' [tbl_source()])
#' - one or more YAML *informant* files to provide refreshed metadata on tables
#' (again, using [tbl_source()] to reference table preparations is ideal)
#' - an output folder (default is `"output"`) to save serialized versions of
#' processed agents and informants
#'
#' Minimal example files of the aforementioned types can be found in the
#' **pointblank** package through the following `system.file()` calls:
#'
#' - `system.file("yaml", "agent-small_table.yml", package = "pointblank")`
#' - `system.file("yaml", "informant-small_table.yml", package = "pointblank")`
#' - `system.file("yaml", "tbl_store.yml", package = "pointblank")`
#'
#' The directory itself can be accessed using `system.file("yaml", package =
#' "pointblank")`.
#'
#' @param path The path that contains the YAML files for agents and informants.
#'
#' @param files A vector of YAML files to use in the execution workflow. By
#'   default, `yaml_exec()` will attempt to process every valid YAML file in
#'   `path` but supplying a vector here limits the scope to the specified files.
#'
#' @param write_to_disk Should the execution workflow include a step that writes
#'   output files to disk? This internally calls [x_write_disk()] to write RDS
#'   files and uses the base filename of the agent/informant YAML file as part
#'   of the output filename, appending the date-time to the basename.
#'
#' @param output_path The output path for any generated output files. By
#'   default, this will be a subdirectory of the provided `path` called
#'   `"output"`.
#'
#' @param keep_tbl,keep_extracts For agents, the table may be kept if it is a
#'   data frame object (databases tables will never be pulled for storage) and
#'   *extracts*, collections of table rows that failed a validation step, may
#'   also be stored. By default, both of these options are set to `FALSE`.
#'
#' @return Invisibly returns a named vector of file paths for the input files
#'   that were processed; file output paths (for wherever writing occurred) are
#'   given as the names.
#'
#' @examples
#' if (interactive()) {
#'
#' # The 'yaml' directory that is
#' # accessible in the package through
#' # `system.file()` contains the files
#' # 1. `agent-small_table.yml`
#' # 2. `informant-small_table.yml`
#' # 3. `tbl_store.yml`
#'
#' # There are references in YAML files
#' # 1 & 2 to the table store YAML file,
#' # so, they all work together cohesively
#'
#' # Let's process the agent and the
#' # informant YAML files with `yaml_exec()`;
#' # and we'll specify the working directory
#' # as the place where the output RDS files
#' # are written
#'
#' output_dir <- getwd()
#'
#' yaml_exec(
#'   path = system.file(
#'     "yaml", package = "pointblank"
#'   ),
#'   output = output_dir
#' )
#'
#' # This generates two RDS files in the
#' # working directory: one for the agent
#' # and the other for the informant; each
#' # of them are automatically time-stamped
#' # so that periodic execution can be
#' # safely carried out without risk of
#' # overwriting
#'
#' }
#'
#' @family pointblank YAML
#' @section Function ID:
#' 11-8
#'
#' @export
yaml_exec <- function(
    path = NULL,
    files = NULL,
    write_to_disk = TRUE,
    output_path = file.path(path, "output"),
    keep_tbl = FALSE,
    keep_extracts = FALSE
) {

  # If `path` isn't provided then the working directory
  # is the path containing the input files
  if (is.null(path)) {
    path <- fs::path_abs(fs::path_wd())
  } else {

    initial_wd <- fs::path_abs(fs::path_wd())
    wd_path <- fs::path_abs(path)

    if (!fs::dir_exists(wd_path)) {
      stop(
        "The `path` provided (", as.character(wd_path), ") does not exist.",
        call. = FALSE
      )
    }

    if (initial_wd != wd_path) {
      setwd(path)
      on.exit(setwd(initial_wd))
    }

    path <- wd_path
  }

  # Construct paths to files
  if (!is.null(files)) {
    files_paths <- fs::path(fs::path_wd(), files)
  } else {
    files_paths <- fs::path(fs::path_wd(), fs::dir_ls(regexp = ".ya?ml$"))
  }

  # Normalize the output path
  if (is.null(output_path)) {
    output_path <- fs::path_norm(initial_wd)
  } else {
    if (!fs::is_absolute_path(output_path)) {
      output_path <- fs::path_norm(fs::path(initial_wd, output_path))
    }
  }

  agent_file_paths <- c()
  informant_file_paths <- c()

  # Determine which of the files are agents
  for (file_path in files_paths) {

    y <- yaml::read_yaml(file_path)

    if (all(c("tbl", "tbl_name", "locale", "steps") %in% names(y))) {
      agent_file_paths <- c(agent_file_paths, file_path)
    }
  }

  # Determine which of the files are informants
  for (file_path in files_paths) {

    y <- yaml::read_yaml(file_path)

    if (all(c("table", "columns") %in% names(y))) {
      informant_file_paths <- c(informant_file_paths, file_path)
    }
  }

  # Get the total number of files that are candidates for agents/informants
  total_files <- length(agent_file_paths) + length(informant_file_paths)

  # If there are no files to process, invisibly return NULL
  if (total_files == 0) {
    return(invisible(NULL))
  }

  # Create a vector for collecting files that were written and also read in
  files_written <- c()
  files_read <- c()

  if (total_files == 1) {
    execution_progress_header <-
      "Execution Started - there is a single file to process"
  } else {
    execution_progress_header <-
      "Execution Started - there are {total_files} files to process"
  }

  cli::cli_h1(execution_progress_header)
  cli::cli_text()

  if (length(agent_file_paths) > 0) {

    for (agent_yml_file in agent_file_paths) {

      cli::cli_rule(left = basename(agent_yml_file))

      agent <- yaml_agent_interrogate(agent_yml_file)

      cli::cli_text()

      if (write_to_disk) {

        fs::dir_create(path = output_path)

        # Construct the filename/path for the output RDS file
        file_name <-
          fs::path(
            output_path,
            paste0(basename(tools::file_path_sans_ext(agent_yml_file)), ".rds")
          )

        # Write the file to disk, affixing the date and time to
        # the filename for ease of parsing by other functions
        x_write_disk(
          x = agent,
          filename = affix_datetime(
            filename = file_name,
            delimiter = "-"
          ),
          keep_tbl = keep_tbl,
          keep_extracts = keep_extracts
        )

        files_written <- c(files_written, as.character(file_name))
      } else {
        files_written <- c(files_written, "")
      }

      files_read <- c(files_read, agent_yml_file)

      cli::cli_rule()
      cli::cli_text()
    }
  }

  if (length(informant_file_paths) > 0) {

    for (informant_yml_file in informant_file_paths) {

      cli::cli_rule(left = basename(informant_yml_file))

      informant <- yaml_informant_incorporate(informant_yml_file)

      cli::cli_text()

      if (write_to_disk) {

        fs::dir_create(path = output_path)

        # Construct the filename/path for the output RDS file
        file_name <-
          fs::path(
            output_path,
            paste0(
              basename(
                tools::file_path_sans_ext(informant_yml_file)
              ),
              ".rds"
            )
          )

        # Write the file to disk, affixing the date and time to
        # the filename for ease of parsing by other functions
        x_write_disk(
          x = informant,
          filename = affix_datetime(
            filename = file_name,
            delimiter = "-"
          )
        )

        files_written <- c(files_written, as.character(file_name))
      } else {
        files_written <- c(files_written, "")
      }

      files_read <- c(files_read, informant_yml_file)

      cli::cli_rule()
    }
  }

  cli::cli_h1("Execution Finished")

  files_in_out <- files_read
  names(files_in_out) <- files_written

  invisible(files_in_out)
}
